{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import matplotlib as mpl\n",
    "\n",
    "def rand_cmap(nlabels, type='bright', first_color_black=True, last_color_black=False, verbose=True, randRGBcolors = [], bounds = [], subplot_ = 1, figsize = 40, ax_ = None, label = ''):\n",
    "    from matplotlib.colors import LinearSegmentedColormap\n",
    "    import colorsys\n",
    "    import numpy as np\n",
    "    if randRGBcolors:\n",
    "        random_colormap = LinearSegmentedColormap.from_list('new_map', randRGBcolors, N=nlabels)\n",
    "    if type == 'bright':\n",
    "        randHSVcolors = [(np.random.uniform(low=0.0, high=1),\n",
    "                          np.random.uniform(low=0.2, high=1),\n",
    "                          np.random.uniform(low=0.9, high=1)) for i in range(nlabels)]\n",
    "        # Convert HSV list to RGB\n",
    "        randRGBcolors = []\n",
    "        for HSVcolor in randHSVcolors:\n",
    "            randRGBcolors.append(colorsys.hsv_to_rgb(HSVcolor[0], HSVcolor[1], HSVcolor[2]))\n",
    "        if first_color_black:\n",
    "            randRGBcolors[0] = [0, 0, 0]\n",
    "        if last_color_black:\n",
    "            randRGBcolors[-1] = [0, 0, 0]\n",
    "        random_colormap = LinearSegmentedColormap.from_list('new_map', randRGBcolors, N=nlabels)\n",
    "    # Display colorbar\n",
    "    if verbose:\n",
    "        from matplotlib import colors, colorbar\n",
    "        from matplotlib import pyplot as plt\n",
    "        if ax_ is None:\n",
    "            fig, ax_ = plt.subplots(1, 1, figsize=(figsize, 0.5))\n",
    "        bounds = np.linspace(0, nlabels, nlabels + 1) if bounds == [] else bounds\n",
    "        norm = colors.BoundaryNorm(bounds, nlabels)\n",
    "        cb = colorbar.ColorbarBase(ax_, cmap=random_colormap, norm=norm, spacing='proportional', ticks=[],\n",
    "                                   boundaries=bounds, format='%1i', orientation=u'horizontal')\n",
    "        if label != '':\n",
    "            cb.ax.tick_params(labelsize='large')\n",
    "            cb.set_label(label,size=70)\n",
    "    return random_colormap, randRGBcolors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "ename": "FileNotFoundError",
     "evalue": "[Errno 2] No such file or directory: '../results/output_hg19_100.txt'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m/var/folders/b9/y5gxsv6j64v1l3z4bmlx4pwc0000gn/T/ipykernel_17865/1249707977.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     16\u001b[0m \u001b[0mbiggest\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     17\u001b[0m \u001b[0mnpip_dict\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 18\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'../results/output_hg19_100.txt'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     19\u001b[0m     \u001b[0mline\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'\\t'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     20\u001b[0m     \u001b[0mlen_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mline\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mline\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '../results/output_hg19_100.txt'"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<Figure size 2880x360 with 0 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "set_all_elems = set()\n",
    "import random\n",
    "def process(elem, sign):\n",
    "    if len( elem.split('(') ) > 1:\n",
    "\n",
    "        elem_ret = elem.split('(')[1]\n",
    "        begin, len2, id, strand = elem_ret.split(',')\n",
    "        if strand.strip() == sign:\n",
    "            return int(begin), int(len2), id\n",
    "all_regions = []\n",
    "figsize = 40\n",
    "fig = plt.figure(figsize=(40, 5))\n",
    "\n",
    "axes = [[0.05, 1, 0.9, 0.15]]\n",
    "axses_real = []\n",
    "biggest = 0\n",
    "npip_dict = dict()\n",
    "for i in open('../results/output_hg19_100.txt'):\n",
    "    line = i.split('\\t')\n",
    "    len_ = int(line[2]) - int(line[1])\n",
    "    if len_ > biggest:\n",
    "        biggest = len_\n",
    "for i in open('../results/output_hg19_100.txt'):\n",
    "    line = i.split('\\t')\n",
    "    s1 = line[4][1:-2]\n",
    "    len_ = int(line[2]) - int(line[1])\n",
    "    elems = s1.split(')')\n",
    "    elems_real = []\n",
    "    set_help = set()\n",
    "\n",
    "\n",
    "    for j in elems:\n",
    "        rez = process(j, '+')\n",
    "        if not rez or len(rez) == 0:\n",
    "            continue\n",
    "        if rez:\n",
    "            b,l,id = rez\n",
    "            elems_real.append((b,l,id))\n",
    "            set_help.add(id)\n",
    "    all_regions.append((line[0], int(line[1]), int(line[2]) , line[3] , elems_real ))\n",
    "    npip_dict[line[3]] = [(line[0], int(line[1]), int(line[2]) , line[3] , elems_real )]\n",
    "\n",
    "    elems_real = []\n",
    "\n",
    "    for j in elems:\n",
    "        rez = process(j, '_')\n",
    "        if not rez or len(rez) == 0:\n",
    "            continue\n",
    "        if rez:\n",
    "            b,l,id = rez\n",
    "            elems_real.append((b,l,id))\n",
    "            set_help.add(id)\n",
    "    all_regions.append((line[0], int(line[1]), int(line[2]) , line[3] , elems_real ))\n",
    "    npip_dict[line[3]].append((line[0], int(line[1]), int(line[2]) , line[3] , elems_real ))\n",
    "\n",
    "    # print (elems_real)\n",
    "    # print (len(elems_real), len(set(set_help)))\n",
    "    set_all_elems = set_all_elems.union(set_help)\n",
    "print (f'len of elems: {len(set_all_elems)}')\n",
    "dict_colors = dict()\n",
    "cmap, randRGBcolors = rand_cmap(len(set_all_elems), type='bright', first_color_black=False, last_color_black=False, verbose=False)\n",
    "dict_colors['0'] = (0,0,0)\n",
    "assert len(randRGBcolors) == len(set_all_elems) \n",
    "list_all_elems = list(set_all_elems)\n",
    "for i in range(len(randRGBcolors)):\n",
    "    dict_colors[list_all_elems[i]] = randRGBcolors[i]\n",
    "# print (dict_colors)\n",
    "count = 0\n",
    "npip_list = ['NPIPA5', 'NPIPA7', 'NPIPA8', 'NPIPA1', 'NPIPA2', 'NPIPA3', 'NPIPB12', 'NPIPB15', 'NPIPB7', 'NPIPB8', 'NPIPB9', 'NPIPB6', 'NPIPB11', 'NPIPB13', 'NPIPB4', 'NPIPB5', 'NPIPB3']\n",
    "# for npip in all_regions:\n",
    "for npip1 in npip_list:\n",
    "    len_ = npip_dict[npip1][0][2] - npip_dict[npip1][0][1]\n",
    "    new_axe = axes[-1]\n",
    "    new_axe[1] = new_axe[1]  - 0.7\n",
    "    new_axe[2] = biggest / len_  \n",
    "    ax1 = fig.add_axes(new_axe)\n",
    "\n",
    "\n",
    "    # for new negative one\n",
    "    new_axe = axes[-1]\n",
    "    new_axe[1] = new_axe[1]  - 0.3\n",
    "    new_axe[2] = biggest / len_  \n",
    "    ax2 = fig.add_axes(new_axe)\n",
    "\n",
    "    for npip in npip_dict[npip1]:\n",
    "        npip_elems = npip[4]\n",
    "        max_gap = 0\n",
    "        count += 1\n",
    "        list_locations = []\n",
    "        list_locations.append(0)\n",
    "        begin = npip[1]\n",
    "        list_colors = [dict_colors['0']]\n",
    "        for i in range(0, len(npip_elems) - 1):\n",
    "            gap = npip_elems[i + 1][0] - (npip_elems[i][0] + npip_elems[i][1])\n",
    "            list_locations.append(npip_elems[i][0] - begin)\n",
    "            list_colors.append(dict_colors[npip_elems[i][2]])\n",
    "            if gap > 0:\n",
    "                list_locations.append(npip_elems[i][0] + npip_elems[i][1] - begin)\n",
    "                list_colors.append(dict_colors['0'])\n",
    "                if gap > max_gap:\n",
    "                    max_gap = gap\n",
    "        \n",
    "        list_locations.append(npip_elems[-1][0]- begin)\n",
    "        list_colors.append(dict_colors[npip_elems[-1][2]])\n",
    "\n",
    "        list_locations.append(npip[2] - npip[1])\n",
    "        list_colors.append(dict_colors['0'])\n",
    "        \n",
    "        for i in range(0, len(list_locations) - 1):\n",
    "            if list_locations[i] > list_locations[i+1]:\n",
    "                print (list_locations[i] , list_locations[i+1], i, i+1 , len(list_locations))\n",
    "                assert False\n",
    "        label=''\n",
    "        ax = ax1\n",
    "        if(count % 2 == 0):\n",
    "            label=npip[3]\n",
    "            ax = ax2\n",
    "\n",
    "        cmap, randRGBcolors= rand_cmap(len(list_locations), type='0', first_color_black=False, last_color_black=False, verbose=True, randRGBcolors = list_colors, bounds = list_locations, subplot_ = count, figsize = figsize, ax_ = ax, label=label)\n",
    "\n",
    "        assert (len(list_locations) == len(list_colors))\n",
    "\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.522523\n",
      "['NPIPA2', 'NPIPA3', 'NPIPA5', 'NPIPA1', 'NPIPA8', 'NPIPA7', 'NPIPB3', 'NPIPB4', 'NPIPB5', 'NPIPB6', 'NPIPB7', 'NPIPB8', 'NPIPB9', 'NPIPB11', 'NPIPB12', 'NPIPB13', 'NPIPB15']\n",
      "[Clade(name='NPIPA1'), Clade(name='NPIPA8'), Clade(name='NPIPA2'), Clade(name='NPIPA7'), Clade(name='NPIPA5'), Clade(name='NPIPA3'), Clade(name='NPIPB4'), Clade(name='NPIPB9'), Clade(name='NPIPB12'), Clade(name='NPIPB11'), Clade(name='NPIPB8'), Clade(name='NPIPB15'), Clade(name='NPIPB13'), Clade(name='NPIPB3'), Clade(name='NPIPB6'), Clade(name='NPIPB7'), Clade(name='NPIPB5')]\n",
      "1\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAECCAYAAADw0Rw8AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAAeTElEQVR4nO3df3xU9Z3v8dfHsLgKFFHU64W4VCpdLbYshVRZUfxxKYHegDWPGikiEI2hQeQKl1DlVitUgV4R78o1TQ0/XCSoXESgdRWtqQr+2FiGHwoqZVkBXQmI/LKAXL77x8ykISZkkpxzZubM+/l45JGZM2fO+X4d/Mw3Z77f95hzDhERCa/Tkt0AERHxlwq9iEjIqdCLiIScCr2ISMip0IuIhFybZDegIZ07d3bdunVLdjNERNLKu+++u8c5d2797SlZ6Lt160Z1dXWymyEiklbM7N8b2q5LNyIiIadCLyIScir0IiIhp0IvIhJyKvQiIiGnQi8iEnIq9CKSVrZv306HDh3YtWsXAAUFBQDk5ORQUlJCXl4en3zyCfn5+QDk5uYybtw4hg4dyoYNGzh27Bjdu3dn7dq1AGzbto3CwsLa/cMoJefRt9SAAQOS3QQR8UlVVVXt7cGDB/Pggw8yd+7c2m0XXnghc+fOZfHixbz55pu129u1a8djjz3G2rVrWb16NR988AH33nsvixYtol+/flx00UVUVFSEutCHZkQ/YMAAIpFIspshIgHIzs6mY8eObNy4sXbbjh07uOuuu1izZg2DBg2q3X748GEmTJjA/PnzGT58OM899xwjRozg8OHDHDx4MBnND1yoRvS9evU66V1fRMJr8uTJjBs3rvZ+dnY2jz766Nf2a9euHXPmzAGibwYbN25k/PjxfPbZZ1RWVlJUVBRUk5MmNCN6EcksZ511Ft///vepqalJ+Dnz58+nvLycsrIyVq5cyapVq9i7dy/FxcWsW7eOhx56yMcWJ4+l4lcJ9unTxzU36yZ+fV4jehHJVGb2rnOuT/3tGtGLiIScCr2ISMip0IuIhJwKvYhIyKnQi4iEnO+F3syyzexVM3vfzN4zs7v8OI8WS4mINCyIEf1xYKJz7lLgcqDEzC718gQDBgzg0KFDXh5SRNKQ1zk4H3/8McOGDWPMmDHMmDEjOZ3ygO+F3jn3qXPuT7HbB4HNQBevz9O+fXuvDykiaSieg1NXPAenoKCgwRyc0tJSVq9ezfPPP1+bgwOwceNG8vPzmTdvHuvWrQu0H14KNALBzLoB/wC8HeR5QYFnImFWd6FkdnY2bdu2bTAH5/jx48yaNYvKykrgrzk4hw8f5oEHHmDixIksWLCAP/7xjxw8eJDLL7+8ttDfcsstQXfLM4EVejNrD/w/YIJz7kADjxcBRRB99/VSPPCsV69enh5XRFKTVzk4Bw4c4Je//CVXXXUV+fn5jB49OqgueCqQQm9mf0O0yD/lnFvW0D7OuXKgHKIRCF63QYFnIpkjnoOzatWqhJ8Tz8G54oor+Oqrr7jxxht58MEHuf/++1m8eDHdunXzr8E+8z3rxswMWAh87pybkMhzmpt1U3fE3lAxVw6OiGSCZGbd/CNwC3CtmUViP4MDOK+IiBDApRvn3BuA+X0eERFpWKi+eCQSiTQ4u0YfxIpIJgtNoVchFxFpWGgKPTT+Yavm0ItIJlOomYhIyKnQi4iEnAq9iCSd12Fky5cv5/bbb+emm27ipZdeSk6nUkiortGfSmMzckQkeep+rhYPI5s7d27ttngY2eLFixsMI1u7di2rV6/mgw8+qA0j69evH8OGDWPYsGHs27ePSZMmMXDgwCC7lXIyYkRfVVWlWTkiKS47O5uOHTs2GEa2Zs0aBg0aVLs9HkY2f/58hg8fznPPPceIESM4fPgwBw8erN1v+vTplJSUBNqPVJQxI3rFH4ikPq/CyG6//XamTJlCbm4uvXv3Dqr5KStjCr2IpD6vwsiOHDnCyy+/zP79+9m6dSvFxcU+tjr1+R5q1hItCTUDjdpFJLMlM9RMRESSSIVeRCTkVOhFREJOhV5EJORU6EVEQi6wQm9mWWa2zswSnzclIiKtFuSI/i5gc4DnE5EM4HVOzuuvv05xcTG33XYb/fr1S06nPBbIgikz6woMAX4F3O3XeZRlI5I5/MrJ6d+/P/3792f58uX07ds3yC75JqgR/RxgMnCisR3MrMjMqs2suqamptkniEQiRCKRFjdQRNKXHzk5ixcvZvjw4YH2wy++j+jN7EfAbufcu2Y2oLH9nHPlQDlEV8a25Fy9evXS6liRDOVVTk5RUREff/wxHTt2pEOHDkE131dBjOj/Ecgzs+3AEuBaM1sUwHlFJIPEc3Kac0UgnpNTVlbGypUrazN2KioqGD16tF9NDVygWTexEf0k59yPTrVfS7JuIpGIRvQiktGUdSMikqECjSl2zlUBVUGeU0Qk02lELyIScir0IiIhp0IvIhJyoSn0hw4dSnYTRERSUmgKvYiINCw0hb59+/bJboKIJJnXAWdVVVX079+f4uLitF6jE+j0ymRT6JlIOPkVcGZmtG/fniNHjtC1a9cgu+Sp0IzomxJfPSsi4eZlwFn//v154YUXmDlzJvfdd18yuuOJjBrRKyJBJDN4GXAG0KlTJ44ePRpI2/2QUYVeRDJDPOAsHlKWiHjA2RVXXMFXX33FjTfeSOfOnXnxxRf54osvTnrjSDeBhpolyo9Qs/j1eY3oRSSsFGomIpKhQnXpJhKJNDqzJj7iFxHJNKEp9CriIiINC02hh1Nff9ccehHJVLpGLyIScoEUejM7y8yWmtkWM9tsZlcEcV4REQluRP8o8C/Oub8HvgdsDui8IpJmvM6r2bx5M8XFxeTn5/P4448np1NJ5vs1ejPrCFwFjAJwzh0Djvl93oacalaOiCSXX3k1l1xyCWVlZZw4cYKRI0cyduzYILuVEoIY0X8TqAHmm9k6M3vCzNrV38nMisys2syqa2pqPG9EVVWVZuaIpAkv82oAVqxYwZAhQxg8eHDgfUkFQcy6aQP0Bu50zr1tZo8CU4D/VXcn51w5UA7RlbF+NESrYkXSh5d5NXl5eeTl5TFkyBCGDx8eVBdSRhCFfiew0zn3duz+UqKFXkSkUV7l1fTo0YNly5Zx9OjRjB3RB5J1Y2avA7c55z4ws/uBds65/9nY/i3JugGN2EUkszWWdRPUgqk7gafMrC2wDRgd0HlFRDJeIIXeORcBvvYuIyIi/tPKWBGRkFOhFxEJORV6EZGQU6EXEQk5FXoR8URrM2pGjRrFHXfcwejRo6moqKg97rx587j66quD71CIhCqPvjmUeSPiDa8yagAefvhhzjjjDMaMGUNhYSHbtm1jz549nHvuuYH1J4wyckQf/zJxEfFWazJqAEpLS8nPz2fgwIGcOHGChx9+mAkTJgTdjdDJ2BF9r169tJJWxActyaiJmzlzJu3bt+emm27iBz/4AXv27GHy5MmsX7+e3//+9xkbYdBaGVvoRcQfLcmoiZs4cSJZWVlceOGFfOtb3+Lpp58GYOfOnSryrRBI1k1z+Z11o2wcEQmjxrJuMvIavYhIJlGhFxEJORV6EZGQU6EXEQk5FXoRkZBToRcRCblA5tGb2f8AbgMcsBEY7Zw74tf5moo3iEQi9OrVy6/Ti4ikFN9H9GbWBRgP9HHO9QSygAK/zqd4A5HU50cA2tSpUykqKmLs2LF8+eWXyelYigpqZWwb4Awz+wo4E/jEz5M1FW+QaKCZgs9EvOVnANqmTZtYvnw5zzzzDMuWLWPEiBGB9SvV+T6id87tAv438DHwKbDfOfdS/f3MrMjMqs2suqamxu9mNUl/GYj4y8sANIAf//jH3Hnnnbz++uvs3Lkz2M6kON9H9GbWCRgKfBP4AnjWzEY45xbV3c85Vw6UQzQCwe92JULBZyL+8ioA7ac//SkjR45k5MiRVFRUcM455/jd9LQSxKWb64F/c87VAJjZMqAfsOiUzxKR0PMqAA1gzpw5fPjhh2RlZTX4ZpHJfA81M7MfAPOAvsBfgAVAtXPunxp7TmtCzRIJLPNqHxGRVJK0UDPn3NvAUuBPRKdWnkbsEo2IiPgvkFk3zrn7gPuCOJeIiJxMK2NFREJOhV5EJOROeenGzIYCw4FvAEedc8OCaFQQIpHIKRdEKSZBRMKiqRH9IOB9ovPg3/K/OcGoqqpSEReRjNHUh7F7gXZADvBt/5sTnKamTSr+QPy0fft2LrvsMrZs2UKXLl0oKChgyZIl5OTk0LdvX3bs2EFZWRnjx49n6dKl5Obm0r17d3bs2MG0adOYPXs2p59+OseOHePKK6+ksLCQ2bNns2jRIp588kl69uzJ8uXL+d3vfseBAwcoLCysXUEqmaepQv8Y8P+BW4Ay/5uTmfSmkjn8zHq5++67OXDgQO1zhg0bxrBhw9i3bx+TJk1Soc9gTV26GQXsIVrwb/C9NRlImTqZy+usl8ZMnz6dkpISfzohaaGpEf2/AkuAE8As/5uTmZSpk7m8zHqpzznHlClTyM3NpXfv3p63XdJHU4X+UmA3cBbwX3xvjUiG8TLrZeHChaxatYrNmzczdepUXn31VV5++WX279/P1q1bKS4u9rr5kiZOmXVjZsOcc8tjt8c75/5PEI3yO+umucf0kzJ1RMQrjWXdNDWirzKzfKJfFvKFHw0TERF/NVXoH4nts5po3PCTvrcohTS1qMqrc2hOv4j4qalZN7uBT51zTwKbA2hPytCiKhEJi6ZG9GuAo2a2HLgYeMj3FqWQIK6baw69iPit0RG9mQ0BfgJkAb8FNgXVKBER8c6pLt10JvqNUOcCZwO/OtWBzGyeme02s011tp1tZqvN7KPY706etFpERBLWaKF3zi10zt0e+/3PzrkNTRxrAdEQtLqmAK845y4GXondFxFp1Pbt2+nQoQO7du0CoKCgAICcnBxKSkrIy8vjk08+IT8/H4Dc3FzGjRvH0KFD2bBhA6NGjeKOO+5g9OjRVFRUADB79mx69+7Npk3Rcei2bdsoLCysPUbYefYNU86518ysW73NQ4EBsdsLgSqg1KtzhkUQs3tEUl2QOUAXXXQRFRUVGVPo/f7ikfOdc5/Gbv8HcH5jO5pZkZlVm1l1TU2Nz81KHZrdI/J1QeUAZYpAvjMWwDnnzKzRZbjOuXJiXxrep0+fxpfrhpBWxYp8nZ85QJnG7xH9Z2Z2AUDs926fzyciIRHPAWrJX/gTJ07kZz/72ddygKZNm8bGjRvZu3cvxcXFrFu3joceCv+s8VNm3TT7YNFr9Kuccz1j938N7HXOzTCzKcDZzrnJTR0nFbJuRETSTWNZN56N6M2sEngT+LaZ7TSzQmAG8N/M7COiEQozvDqfiIgkxstZNzc38tB1Xp1DRESaz+9r9CIikmShKfTxuej6Wj4RkZOFotBrLrqISOMCm0fvt/gsG60wFRE5WShG9CIi0jgVehEJlB+hZdOnT2fMmDHccMMN7Ny5MzkdS2GhuXTTErrMIxIcP0PL3nvvPSorK6msrKS6upquXbsG1q90kLEjes3QEUker0PLrrnmGq699lp+85vfcN11WrpTX0aP6Hv16qWoBJEk8TK0bOXKlfzhD39gzZo1VFRUMGHCBJ9bn14yutCLSPLEQ8tWrVrV7OdOnDiRrKys2tCy73znOxQXF1NTU8MvfvELr5ua9jwNNfNKc0PN6ko01EzhZyISNr6HmomISGpSoRcRCTkVehGRkFOhFxEJudDPumlsUVQkElEQmohkBC+/YWqeme02s011tv3azLaY2QYze87MzvLqfInQoigREW8v3SwABtXbthro6Zz7LvAh8HMPz5eQ+KKo+j8azUui/MhmmT17Nr1792bTpui4qKqqiv79+1NcXKwpv+I5L79K8LXYl4PX3fZSnbtvAflenU8apwwfb/iZzXL33Xdz4MCB2ueYGe3bt+fIkSPKaRHPBflh7BjghcYeNLMiM6s2s+qampoAmxUuulzlD6+zWerr378/L7zwAjNnzuS+++7ztzOScQL5MNbM7gWOA081to9zrhwoh+jK2CDaFVbK8PGHl9ks9Z12WnTM1alTJ44ePeptwyXj+V7ozWwU8CPgOpeKeQsiCfIym2XhwoWsWrWKzZs3M3XqVD766CNefPFFvvjii5PeTES84GnWTewa/SrnXM/Y/UHAbOBq51zC12O8yro5VZ5NWLNuwtovEWma71k3ZlYJvAl828x2mlkh8BjQAVhtZhEzK/PqfCIikhgvZ93c3MDmCq+O74dIJBK6GSpaCCYi9WVsBILm0otIpgh9BMKphPE6dtj+QhGR1svYEb2ISKZQoRcRCTkVehEJlSCyiTZv3kxxcTH5+fk8/vjjSehl82T0NfqwCuNsIpGmBJlNdMkll1BWVsaJEycYOXIkY8eO9b+DraARfchoNpGI/9lEACtWrGDIkCEMHjzYv454RCP6EArjbCKR5vIzmwggLy+PvLw8hgwZUvsGkapU6EUklPzMJtq7dy/Lli3j6NGjaTGi9zTrxiutzbqJrw6N/9YIV0Qyge9ZN6lC16hFRE4Wyks38RG8Zp6IiIRwRC8iIidToRcRCTkVehGRkFOhFxEJOS+/YWqeme02s00NPDbRzJyZdfbqfCIikhgvR/QLgEH1N5pZNjAQ+NjDc4lIivMjXGzWrFncdtttDB06lM8//zw5HUtDXn6V4GuxLwev7xFgMvC8V+cKkqZoijSPn+FikydPBuCRRx5h69at5OTkBNOpNOfrNXozGwrscs6tT2DfIjOrNrPqmpoaP5uVsPgqWxFpGa/DxY4dO0ZJSQmvvPIKPXr0CLYzacy3BVNmdiZwD9HLNk1yzpUD5RCNQPCrXc2lCAWR1vEyXKxt27bMnTuXZ555hueff55bb73V7+aHgp8rY7sD3wTWmxlAV+BPZpbjnPsPH88rIinEy3Cx0tJSvvzyS/bt28fDDz/sdVNDy9NQs9g1+lXOuZ4NPLYd6OOc29PUcVoTalZX/Pp6S0fkrX2+iEiQfA81M7NK4E3g22a208wKvTq2iIi0nJezbm5u4vFuXp1LREQSp5WxIiIhp0IvIhJyKvQiIiGnQi8iEnKh/IapuiKRSItjDOLfOSsi3tm+fTuXXXYZW7ZsoUuXLhQUFLBkyRJycnLo27cvO3bsoKysjPHjx7N06VJyc3Pp3r07O3bsYNq0acyePZvTTz+dY8eOceWVV1JYWMgbb7zBkiVLyMrKYsqUKVxwwQXJ7mZKCXWhr6qqSpmsmlRph0iy+JmBM2fOHLp3705WVhZnn312YH1KF6Eu9NC6xU5eFed4Zo7+OhCJys7Opm3btg1m4Bw/fpxZs2ZRWVkJ/DUD5/DhwzzwwAP8/Oc/p7S09KTky/Xr17NkyRJefPFFnnrqKcaMGZOUfqWq0Bf6VKHMHJGTeZmBc8kll9CmTRs6derE1q1b/W562lGhF5Gk8DIDZ8SIEYwdO5bDhw8rA6cBnmbdeMWrrJvW8irrRpk5IhIE37NuREQkNanQi4iEnAq9iEjIqdCLiIScZt00oTUra+seQ3PoRSRZvPzikXlmttvMNtXbfqeZbTGz98xsllfnC0JVVZUKtIikPS9H9AuAx4An4xvM7BpgKPA959xRMzvPw/MFwospkYo/kEzkR6ZNaWkp+/fv5+2332bGjBn88Ic/THY304JnI3rn3GvA5/U2jwVmOOeOxvbZ7dX5RCT1xTNt6opn2hQUFDSYaVNaWnpSps0TTzzBa6+9BkRXxJaVldGlSxeuv/764DqS5vy+Rt8D6G9mvwKOAJOcc//q8zmllfQXiLRG3b+Cvc60AXjnnXfo3bs3WVlZgfUp3fld6NsAZwOXA32BZ8zsItfAclwzKwKKgNplzRI8BbCJ17zMtAF44oknuOeee3xtc9j4Xeh3Astihf0dMzsBdAZq6u/onCsHyiEageBzu+QUFMAmXvIy0+bgwYPs2bOHbt26edzKcPM068bMugGrnHM9Y/eLgf/qnPuFmfUAXgEubGhEX1eqZN14JZ2ybtKprSJyssaybjwb0ZtZJTAA6GxmO4H7gHnAvNiUy2PArU0VeRER8ZZnhd45d3MjD43w6hwiItJ8WhkbEC9W2AZBH8SKhI+ybgKgFbYikkwa0QckXT7cTIe/OkSkeTSiFxEJORV6EZGQU6EXEd9t376dDh06sGvXLgAKCgoAyMnJoaSkhLy8vJOiDnJzcxk3bhxDhw5lw4YNjBo1ijvuuIPRo0dTUVFRu09xcTGTJk1KTqfSiK7Ry9ekywwhSX11P5uKB5zNnTu3dls84Gzx4sUNBpytXbv2pICzM844gzFjxlBYWMiZZ57JiRMnOP/88wPrT7rSiF5OohlC4pfs7Gw6duzYYMDZmjVrGDRoUO32eMDZ/PnzGT58OAClpaXk5+czcOBAAJ599lnKy8v59NNP2bBhQ7CdSTMa0cvXpMsMIUk/XgacnXZadJx63nnncejQIV/bne5U6EUkMF4GnN16662ceeaZHD9+nMmTJ3vd1FDxNNTMK2ELNRMRCUJjoWa6Ri8iEnIq9CIiIadCLyIScir0IiIhp0IvIhJyKvQiIiGXktMrzawG+PdmPq0zsMeH5iSL+pPa1J/Ulqn9+Tvn3Ln1N6ZkoW8JM6tuaP5oulJ/Upv6k9rUn5Pp0o2ISMip0IuIhFyYCn15shvgMfUntak/qU39qSM01+hFRKRhYRrRi4hIA1ToRURCLu0KvZkNMrMPzGyrmU1p4PHTzezp2ONvm1m3JDQzYQn05yoz+5OZHTez/GS0sTkS6M/dZva+mW0ws1fM7O+S0c5EJdCfYjPbaGYRM3vDzC5NRjsT1VR/6ux3o5k5M0vpKYoJvD6jzKwm9vpEzOy2ZLQzUYm8Pmb2k9j/Q++Z2eKEDuycS5sfIAv4M3AR0BZYD1xab5+fAWWx2wXA08ludyv70w34LvAkkJ/sNnvQn2uAM2O3x4bg9flGndt5wL8ku92t6U9svw7Aa8BbQJ9kt7uVr88o4LFkt9XD/lwMrAM6xe6fl8ix021EnwNsdc5tc84dA5YAQ+vtMxRYGLu9FLjOzCzANjZHk/1xzm13zm0ATiSjgc2USH9edc59Gbv7FtA14DY2RyL9OVDnbjsglWc3JPL/D8A0YCZwJMjGtUCi/UkXifTndmCuc24fgHNudyIHTrdC3wXYUef+zti2Bvdxzh0H9gPnBNK65kukP+mkuf0pBF7wtUWtk1B/zKzEzP4MzALGB9S2lmiyP2bWG8h2zv0uyIa1UKL/3m6MXSpcambZwTStRRLpTw+gh5mtMbO3zGwQCUi3Qi8hYWYjgD7Ar5PdltZyzs11znUHSoGpyW5PS5nZacBsYGKy2+KhlUA359x3gdX89a/9dNWG6OWbAcDNwG/N7KymnpRuhX4XUPcduWtsW4P7mFkboCOwN5DWNV8i/UknCfXHzK4H7gXynHNHA2pbSzT39VkCDPOzQa3UVH86AD2BKjPbDlwOrEjhD2SbfH2cc3vr/Bt7Avh+QG1riUT+ve0EVjjnvnLO/RvwIdHCf2rJ/gCimR9WtAG2Ad/krx9WfKfePiWc/GHsM8lud2v6U2ffBaT+h7GJvD7/QPQDp4uT3V6P+nNxndv/HahOdrtb0596+1eR2h/GJvL6XFDn9g3AW8ludyv7MwhYGLvdmeilnnOaPHayO9eC/xiDY+9ifwbujW17gOjoEOBvgWeBrcA7wEXJbnMr+9OX6Lv4YaJ/mbyX7Da3sj8vA58BkdjPimS3uZX9eRR4L9aXV09VOFPhp6n+1Ns3pQt9gq/PQ7HXZ33s9fn7ZLe5lf0xopfX3gc2AgWJHFcRCCIiIZdu1+hFRKSZVOhFREJOhV5EJORU6EVEQk6FXkQk5FToJVRiaYU/auSxZmcemdn9ZtazkceWNvd4dZ67wMzat/Y4Iolok+wGiPjgZjO7BtjtnJtpZu8B/wz83sx+ApwFrHfO/dbM3gCWEY1jmAT8DTAd2A08FztekZl1AF5zzs2vfzIzu4noKtJvAP9EdHVsB+A4sMU5N9/Mfhs75hVE83B6AffEYmY7m9n02LYi59wnHv/3kAynEb2E0UvOuYlA31h+yyfOuRnAR0QHN58DP4nte9A5NxuoBK4murL6AefcROfcG7F9nnbOjSa6KrEh44iG5+0mmkAI0RXZpcBAM7sM2OmcuxfYHHs8AjzonNsEfOWcm0p0if7VHvRf5CQa0UuYxVcD7o/9Hgy875x70sxejW07HPv9FXA60ZWH9SOh489vbHXhX5xz98fvmNn9dY5b/3JR/Bh1zxE//hGif22IeEqFXsLoh2b2PaK5MyfqXJpfB8wwswuIfslDQ/4vcL+ZfQqsSPB8i8ysHPgL8LV4X+fcRjO7M3Z55nvAIeBNYJaZlSXcK5EWUgSCSADMrJBoyuDfOucmJLk5kmFU6EVEQk4fxoqIhJwKvYhIyKnQi4iEnAq9iEjIqdCLiITcfwLhNH/Z214exQAAAABJRU5ErkJggg==",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "import numpy as np\n",
    "from scipy.cluster.hierarchy import dendrogram\n",
    "from scipy.cluster.hierarchy import average, linkage\n",
    "from matplotlib import pyplot as plt\n",
    "import scipy\n",
    "import pandas as pd \n",
    "import scipy.spatial\n",
    "import scipy.cluster\n",
    "import json\n",
    "import matplotlib.pyplot as plt\n",
    "from functools import reduce\n",
    "from Bio.Phylo.TreeConstruction import _Matrix\n",
    "from Bio.Phylo.TreeConstruction import DistanceTreeConstructor, DistanceMatrix\n",
    "import pylab\n",
    "from Bio import Phylo\n",
    "import sys\n",
    "\n",
    "# Loading data\n",
    "file_ = open('../phylo/_o', 'r').readlines()\n",
    "matrix_len = file_[0]\n",
    "b = np.ones((int(matrix_len),int(matrix_len)))\n",
    "min_ = 2.0\n",
    "for i in file_[1:]:\n",
    "    line = i.split('\\t')\n",
    "    c1 = int(line[0])\n",
    "    c2 = int(line[1])\n",
    "    \n",
    "    num = float(line[2])\n",
    "    if num < min_:\n",
    "        min_ = num\n",
    "    b[c1][c2] = num\n",
    "    b[c2][c1] = num\n",
    "\n",
    "\n",
    "X = b\n",
    "for i in range(0, len(X)):\n",
    "    X[i][i] = 0.0\n",
    "print (min_)\n",
    "\n",
    "s = ''\n",
    "names = []\n",
    "for i in open('../phylo/_o_sd'):\n",
    "\tline = i.split('\\t')\n",
    "\tnames.append(f'{line[3]}')\n",
    "print (names)\n",
    "labels = names\n",
    "\n",
    "# # this part for neighbour joining\n",
    "\n",
    "X_sub = []\n",
    "for i in range(0,len(X)):\n",
    "    X_sub_one = []\n",
    "    for j in range (0,i+1):\n",
    "        X_sub_one.append(X[i][j])\n",
    "    X_sub.append(X_sub_one)\n",
    "X = X_sub\n",
    "\n",
    "dm = DistanceMatrix(names=labels, matrix=X)\n",
    "constructor = DistanceTreeConstructor()\n",
    "tree = constructor.nj(dm)\n",
    "tree.ladderize()\n",
    "plt.rc('font', size=7)          # controls default text sizes #HERE IS THE SETTING FOR THAT ALLOWS ME TO HIDE THE BRANCH TIP LABELS\n",
    "plt.rc('axes', titlesize=50)     # fontsize of the axes title\n",
    "plt.rc('xtick', labelsize=10)    # fontsize of the tick labels\n",
    "plt.rc('ytick', labelsize=10)    # fontsize of the tick labels\n",
    "plt.rc('figure', titlesize=18)   # fontsize of the figure title\n",
    "print (list(tree.find_clades(terminal=True)))\n",
    "def label_func(node):\n",
    "    l = []\n",
    "    for i in list(tree.find_clades(terminal=True)):\n",
    "        l.append(i.name)\n",
    "    if str(node) in l:\n",
    "        return str(node)\n",
    "    return ''\n",
    "Phylo.draw(tree, do_show=False, show_confidence = False, label_func = label_func)\n",
    "\n",
    "# Phylo.draw_graphviz(tree)\n",
    "plt.savefig(\"TreeToCutOff.png\", format='png', dpi=1200)\n",
    "# pylab.show()\n",
    "print('1')"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "5ae984702f43814661ff05ca52b31c3911ddd290f7c3f2a70a4c897cde64189c"
  },
  "kernelspec": {
   "display_name": "Python 3.7.5 64-bit ('3.7.5': pyenv)",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.5"
  },
  "orig_nbformat": 2
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
